Assignment - 01

Author

Sean Kim

Step 1

library(data.table)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::between()     masks data.table::between()
✖ dplyr::filter()      masks stats::filter()
✖ dplyr::first()       masks data.table::first()
✖ lubridate::hour()    masks data.table::hour()
✖ lubridate::isoweek() masks data.table::isoweek()
✖ dplyr::lag()         masks stats::lag()
✖ dplyr::last()        masks data.table::last()
✖ lubridate::mday()    masks data.table::mday()
✖ lubridate::minute()  masks data.table::minute()
✖ lubridate::month()   masks data.table::month()
✖ lubridate::quarter() masks data.table::quarter()
✖ lubridate::second()  masks data.table::second()
✖ purrr::transpose()   masks data.table::transpose()
✖ lubridate::wday()    masks data.table::wday()
✖ lubridate::week()    masks data.table::week()
✖ lubridate::yday()    masks data.table::yday()
✖ lubridate::year()    masks data.table::year()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
prt.02 <- fread("/Users/seankim/Downloads/ad_viz_plotval_data_2002.csv")
prt.22 <- fread("/Users/seankim/Downloads/ad_viz_plotval_data_2022.csv")

dimensions

dim(prt.02)
[1] 15976    20
dim(prt.22)
[1] 57775    20

headers and footers

head(prt.02)
         Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
1: 01/05/2002    AQS 60010007   1                           25.1 ug/m3 LC
2: 01/06/2002    AQS 60010007   1                           31.6 ug/m3 LC
3: 01/08/2002    AQS 60010007   1                           21.4 ug/m3 LC
4: 01/11/2002    AQS 60010007   1                           25.9 ug/m3 LC
5: 01/14/2002    AQS 60010007   1                           34.5 ug/m3 LC
6: 01/17/2002    AQS 60010007   1                           41.0 ug/m3 LC
   DAILY_AQI_VALUE Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
1:              78 Livermore               1              100
2:              92 Livermore               1              100
3:              71 Livermore               1              100
4:              80 Livermore               1              100
5:              98 Livermore               1              100
6:             115 Livermore               1              100
   AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
1:              88101 PM2.5 - Local Conditions     41860
2:              88101 PM2.5 - Local Conditions     41860
3:              88101 PM2.5 - Local Conditions     41860
4:              88101 PM2.5 - Local Conditions     41860
5:              88101 PM2.5 - Local Conditions     41860
6:              88101 PM2.5 - Local Conditions     41860
                           CBSA_NAME STATE_CODE      STATE COUNTY_CODE  COUNTY
1: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
2: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
3: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
4: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
5: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
6: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
   SITE_LATITUDE SITE_LONGITUDE
1:      37.68753      -121.7842
2:      37.68753      -121.7842
3:      37.68753      -121.7842
4:      37.68753      -121.7842
5:      37.68753      -121.7842
6:      37.68753      -121.7842
head(prt.22)
         Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
1: 01/01/2022    AQS 60010007   3                           12.7 ug/m3 LC
2: 01/02/2022    AQS 60010007   3                           13.9 ug/m3 LC
3: 01/03/2022    AQS 60010007   3                            7.1 ug/m3 LC
4: 01/04/2022    AQS 60010007   3                            3.7 ug/m3 LC
5: 01/05/2022    AQS 60010007   3                            4.2 ug/m3 LC
6: 01/06/2022    AQS 60010007   3                            3.8 ug/m3 LC
   DAILY_AQI_VALUE Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
1:              52 Livermore               1              100
2:              55 Livermore               1              100
3:              30 Livermore               1              100
4:              15 Livermore               1              100
5:              18 Livermore               1              100
6:              16 Livermore               1              100
   AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
1:              88101 PM2.5 - Local Conditions     41860
2:              88101 PM2.5 - Local Conditions     41860
3:              88101 PM2.5 - Local Conditions     41860
4:              88101 PM2.5 - Local Conditions     41860
5:              88101 PM2.5 - Local Conditions     41860
6:              88101 PM2.5 - Local Conditions     41860
                           CBSA_NAME STATE_CODE      STATE COUNTY_CODE  COUNTY
1: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
2: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
3: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
4: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
5: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
6: San Francisco-Oakland-Hayward, CA          6 California           1 Alameda
   SITE_LATITUDE SITE_LONGITUDE
1:      37.68753      -121.7842
2:      37.68753      -121.7842
3:      37.68753      -121.7842
4:      37.68753      -121.7842
5:      37.68753      -121.7842
6:      37.68753      -121.7842
tail(prt.02)
         Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
1: 12/10/2002    AQS 61131003   1                             15 ug/m3 LC
2: 12/13/2002    AQS 61131003   1                             15 ug/m3 LC
3: 12/22/2002    AQS 61131003   1                              1 ug/m3 LC
4: 12/25/2002    AQS 61131003   1                             23 ug/m3 LC
5: 12/28/2002    AQS 61131003   1                              5 ug/m3 LC
6: 12/31/2002    AQS 61131003   1                              6 ug/m3 LC
   DAILY_AQI_VALUE            Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
1:              57 Woodland-Gibson Road               1              100
2:              57 Woodland-Gibson Road               1              100
3:               4 Woodland-Gibson Road               1              100
4:              74 Woodland-Gibson Road               1              100
5:              21 Woodland-Gibson Road               1              100
6:              25 Woodland-Gibson Road               1              100
   AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
1:              88101 PM2.5 - Local Conditions     40900
2:              88101 PM2.5 - Local Conditions     40900
3:              88101 PM2.5 - Local Conditions     40900
4:              88101 PM2.5 - Local Conditions     40900
5:              88101 PM2.5 - Local Conditions     40900
6:              88101 PM2.5 - Local Conditions     40900
                                 CBSA_NAME STATE_CODE      STATE COUNTY_CODE
1: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
2: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
3: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
4: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
5: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
6: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
   COUNTY SITE_LATITUDE SITE_LONGITUDE
1:   Yolo      38.66121      -121.7327
2:   Yolo      38.66121      -121.7327
3:   Yolo      38.66121      -121.7327
4:   Yolo      38.66121      -121.7327
5:   Yolo      38.66121      -121.7327
6:   Yolo      38.66121      -121.7327
tail(prt.22)
         Date Source  Site ID POC Daily Mean PM2.5 Concentration    UNITS
1: 12/01/2022    AQS 61131003   1                            3.4 ug/m3 LC
2: 12/07/2022    AQS 61131003   1                            3.8 ug/m3 LC
3: 12/13/2022    AQS 61131003   1                            6.0 ug/m3 LC
4: 12/19/2022    AQS 61131003   1                           34.8 ug/m3 LC
5: 12/25/2022    AQS 61131003   1                           23.2 ug/m3 LC
6: 12/31/2022    AQS 61131003   1                            1.0 ug/m3 LC
   DAILY_AQI_VALUE            Site Name DAILY_OBS_COUNT PERCENT_COMPLETE
1:              14 Woodland-Gibson Road               1              100
2:              16 Woodland-Gibson Road               1              100
3:              25 Woodland-Gibson Road               1              100
4:              99 Woodland-Gibson Road               1              100
5:              74 Woodland-Gibson Road               1              100
6:               4 Woodland-Gibson Road               1              100
   AQS_PARAMETER_CODE       AQS_PARAMETER_DESC CBSA_CODE
1:              88101 PM2.5 - Local Conditions     40900
2:              88101 PM2.5 - Local Conditions     40900
3:              88101 PM2.5 - Local Conditions     40900
4:              88101 PM2.5 - Local Conditions     40900
5:              88101 PM2.5 - Local Conditions     40900
6:              88101 PM2.5 - Local Conditions     40900
                                 CBSA_NAME STATE_CODE      STATE COUNTY_CODE
1: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
2: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
3: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
4: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
5: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
6: Sacramento--Roseville--Arden-Arcade, CA          6 California         113
   COUNTY SITE_LATITUDE SITE_LONGITUDE
1:   Yolo      38.66121      -121.7327
2:   Yolo      38.66121      -121.7327
3:   Yolo      38.66121      -121.7327
4:   Yolo      38.66121      -121.7327
5:   Yolo      38.66121      -121.7327
6:   Yolo      38.66121      -121.7327

Variable names and types

str(prt.02)
Classes 'data.table' and 'data.frame':  15976 obs. of  20 variables:
 $ Date                          : chr  "01/05/2002" "01/06/2002" "01/08/2002" "01/11/2002" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
 $ POC                           : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Daily Mean PM2.5 Concentration: num  25.1 31.6 21.4 25.9 34.5 41 29.3 15 18.8 37.9 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  78 92 71 80 98 115 87 57 65 107 ...
 $ Site Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
 $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
 $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
 $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
 - attr(*, ".internal.selfref")=<externalptr> 
str(prt.22)
Classes 'data.table' and 'data.frame':  57775 obs. of  20 variables:
 $ Date                          : chr  "01/01/2022" "01/02/2022" "01/03/2022" "01/04/2022" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
 $ POC                           : int  3 3 3 3 3 3 3 3 3 3 ...
 $ Daily Mean PM2.5 Concentration: num  12.7 13.9 7.1 3.7 4.2 3.8 2.3 6.9 13.6 11.2 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  52 55 30 15 18 16 10 29 54 47 ...
 $ Site Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
 $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
 $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
 $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
 - attr(*, ".internal.selfref")=<externalptr> 

Checking data

summary(prt.02$`Daily Mean PM2.5 Concentration`)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    7.00   12.00   16.12   20.50  104.30 
summary(prt.22$`Daily Mean PM2.5 Concentration`)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -2.200   4.200   7.000   8.573  10.900 302.500 

It doesn’t make sense for a minimum of -2.2 PM2.5 concentration, so I will subset prt.22 to include data for PM2.5 >0.

prt.22 <- prt.22[prt.22$`Daily Mean PM2.5 Concentration` >= 0]

Step 2: Combine Data

data_combined <- rbindlist(list(
  prt.02[, year := 2002], 
  prt.22[, year := 2022]))
setnames(data_combined, c("Daily Mean PM2.5 Concentration", "SITE_LATITUDE", "SITE_LONGITUDE", "Site Name"), c("PM2.5", "Lat", "Lon", "SiteName"))
head(data_combined)
         Date Source  Site ID POC PM2.5    UNITS DAILY_AQI_VALUE  SiteName
1: 01/05/2002    AQS 60010007   1  25.1 ug/m3 LC              78 Livermore
2: 01/06/2002    AQS 60010007   1  31.6 ug/m3 LC              92 Livermore
3: 01/08/2002    AQS 60010007   1  21.4 ug/m3 LC              71 Livermore
4: 01/11/2002    AQS 60010007   1  25.9 ug/m3 LC              80 Livermore
5: 01/14/2002    AQS 60010007   1  34.5 ug/m3 LC              98 Livermore
6: 01/17/2002    AQS 60010007   1  41.0 ug/m3 LC             115 Livermore
   DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE       AQS_PARAMETER_DESC
1:               1              100              88101 PM2.5 - Local Conditions
2:               1              100              88101 PM2.5 - Local Conditions
3:               1              100              88101 PM2.5 - Local Conditions
4:               1              100              88101 PM2.5 - Local Conditions
5:               1              100              88101 PM2.5 - Local Conditions
6:               1              100              88101 PM2.5 - Local Conditions
   CBSA_CODE                         CBSA_NAME STATE_CODE      STATE
1:     41860 San Francisco-Oakland-Hayward, CA          6 California
2:     41860 San Francisco-Oakland-Hayward, CA          6 California
3:     41860 San Francisco-Oakland-Hayward, CA          6 California
4:     41860 San Francisco-Oakland-Hayward, CA          6 California
5:     41860 San Francisco-Oakland-Hayward, CA          6 California
6:     41860 San Francisco-Oakland-Hayward, CA          6 California
   COUNTY_CODE  COUNTY      Lat       Lon year
1:           1 Alameda 37.68753 -121.7842 2002
2:           1 Alameda 37.68753 -121.7842 2002
3:           1 Alameda 37.68753 -121.7842 2002
4:           1 Alameda 37.68753 -121.7842 2002
5:           1 Alameda 37.68753 -121.7842 2002
6:           1 Alameda 37.68753 -121.7842 2002

Step 3: Basic Map

library(leaflet)
leaflet(data_combined) %>% 
  addTiles() %>% 
  addCircleMarkers(
    lng = ~Lon,
    lat = ~Lat,
    radius = 1, 
    color = ~ifelse(year == 2002, "red", "yellow"), 
    weight = 2, 
    opacity = 0.1,
    popup = ~SiteName, 
    label = "Map of Sites Measured in 2002(red) and 2022 (yellow)")

Markers are highly concentrated in the major regions of California - Sacramento, Bay Area, and Los Angeles/San Diego. There is also significant coverage of the rest of the state, with distributed sites all over. There appear to be more sites in 2022 compared to 2002, due to many more yellow markers present compared to the red.

Step 4: Checking for missing/implausible values of PM2.5 in combined dataset.

sum(is.na(data_combined$PM))
[1] 0
head(data_combined$PM2.5)
[1] 25.1 31.6 21.4 25.9 34.5 41.0
tail(data_combined$PM2.5)
[1]  3.4  3.8  6.0 34.8 23.2  1.0
summary(data_combined$PM2.5)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    4.60    7.70   10.24   12.50  302.50 

Data has been cleared of implausible/missing values. Observations with NA or implausible PM2.5 have been removed from the dataset.

Step 5: 3 different spatial levels for comparing daily concentrations of PM2.5 in CA from 2002 to 2022.

State-wide Data:

library(ggplot2)

average_pm_by_year <- data_combined %>%
  group_by(year) %>%
  summarize(
    Average_PM = mean(PM2.5, na.rm = TRUE),
    SD_PM = sd(PM2.5, na.rm = TRUE)
  )

ggplot(average_pm_by_year, aes(x = as.factor(year), y = Average_PM)) +
  geom_bar(stat = "identity", fill = "blue") +
  geom_errorbar(
    aes(ymin = Average_PM - SD_PM, ymax = Average_PM + SD_PM),
    width = 0.2, 
    position = position_dodge(width = 0.9)) + 
  labs(title = "Average PM2.5 Level in California by Year (2002-2022)", x = "Year", y = "Average PM2.5 Level") 

t_test_state <- t.test(prt.02$`Daily Mean PM2.5 Concentration`, prt.22$`Daily Mean PM2.5 Concentration`, paired = FALSE)
t_test_state

    Welch Two Sample t-test

data:  prt.02$`Daily Mean PM2.5 Concentration` and prt.22$`Daily Mean PM2.5 Concentration`
t = 65.583, df = 18898, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 7.283791 7.732587
sample estimates:
mean of x mean of y 
16.115943  8.607754 

On the state-wide level, there was a decrease in the average PM2.5 concentration from 2002 to 2022, but that decrease was not statistically significant.

County-wide data

average_pm_by_county_02 <- data_combined[data_combined$year == 2002, ] %>%
  group_by(COUNTY) %>% 
  summarize(
    Average_PM_2002 = mean(PM2.5, na.rm = TRUE),
    SD_PM_2002 = sd(PM2.5, na.rm = TRUE),
    Year = mean(year),
    Lat = mean(Lat), 
    Lon = mean(Lon))

average_pm_by_county_22 <- data_combined[data_combined$year == 2022, ] %>%
  group_by(COUNTY) %>% 
  summarize(
    Average_PM_2022 = mean(PM2.5, na.rm = TRUE),
    SD_PM_2022 = sd(PM2.5, na.rm = TRUE), 
    Year = mean(year),
    Lat = mean(Lat), 
    Lon = mean(Lon)) 

County_mean <- rbindlist(list(
  average_pm_by_county_02, 
  average_pm_by_county_22))
Column 2 ['Average_PM_2022'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for  backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
color_palette <- colorNumeric(
  palette = "viridis",  
  domain = County_mean$Average_PM_2002
)

temp.pal02 <- colorNumeric(c('darkgreen','goldenrod','brown'), domain=average_pm_by_county_02$Average_PM_2002)

PMmap02 <- leaflet(average_pm_by_county_02) %>% 
  addProviderTiles('CartoDB.Positron') %>% 
  addCircles(
    lat = ~Lat, lng=~Lon,
    label = ~paste0(round(average_pm_by_county_02$Average_PM_2002,2), ' PM2.5'), color = ~ temp.pal02(average_pm_by_county_02$Average_PM_2002),
    opacity = 1, fillOpacity = 1, radius = 500
    ) %>%
  addLegend('bottomleft', pal=temp.pal02, values=average_pm_by_county_02$Average_PM_2002,
          title='Mean Concentrations PM2.5 in 2002', opacity=1)
PMmap02
temp.pal22 <- colorNumeric(c('darkgreen','goldenrod','brown'), domain=average_pm_by_county_22$Average_PM_2022)

PMmap22 <- leaflet(average_pm_by_county_22) %>% 
  addProviderTiles('CartoDB.Positron') %>% 
  addCircles(
    lat = ~Lat, lng=~Lon,
    label = ~paste0(round(average_pm_by_county_22$Average_PM_2022,2), ' PM2.5'), color = ~ temp.pal22(average_pm_by_county_22$Average_PM_2022),
    opacity = 1, fillOpacity = 1, radius = 500
    ) %>%
  addLegend('bottomleft', pal=temp.pal22, values=average_pm_by_county_22$Average_PM_2022,
          title='Mean Concentrations PM2.5 in 2022', opacity=1)
PMmap22

Decrease overall average in PM2.5 concentrations by county from 2002 to 2022.

Site-specific data

average_pm_by_site_02 <- data_combined[data_combined$year == 2002, ] %>%
  group_by(SiteName) %>% 
  summarize(
    Average_PM_2002_site = mean(PM2.5, na.rm = TRUE),
    SD_PM_2002_site = sd(PM2.5, na.rm = TRUE),
    Year = mean(year),
    Lat = mean(Lat), 
    Lon = mean(Lon))

average_pm_by_site_22 <- data_combined[data_combined$year == 2022, ] %>%
  group_by(SiteName) %>% 
  summarize(
    Average_PM_2022_site = mean(PM2.5, na.rm = TRUE),
    SD_PM_2022_site = sd(PM2.5, na.rm = TRUE), 
    Year = mean(year),
    Lat = mean(Lat), 
    Lon = mean(Lon)) 

Site_mean <- rbindlist(list(
  average_pm_by_site_02, 
  average_pm_by_site_22))
Column 2 ['Average_PM_2022_site'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for  backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
color_palette <- colorNumeric(
  palette = "viridis",  
  domain = Site_mean$Average_PM_2002_site
)

temp.pal02.s <- colorNumeric(c('darkgreen','goldenrod','brown'), domain=average_pm_by_site_02$Average_PM_2002_site)

PMmap02.s <- leaflet(average_pm_by_site_02) %>% 
  addProviderTiles('CartoDB.Positron') %>% 
  addCircles(
    lat = ~Lat, lng=~Lon,
    label = ~paste0(round(average_pm_by_site_02$Average_PM_2002_site,2), ' PM2.5'), color = ~ temp.pal02.s(average_pm_by_site_02$Average_PM_2002_site),
    opacity = 1, fillOpacity = 1, radius = 500
    ) %>%
  addLegend('bottomleft', pal=temp.pal02.s, values=average_pm_by_site_02$Average_PM_2002_site,
          title='Mean Concentrations PM2.5 by site in 2002', opacity=1)
PMmap02.s
temp.pal22.s <- colorNumeric(c('darkgreen','goldenrod','brown'), domain=average_pm_by_site_22$Average_PM_2022_site)

PMmap22.s <- leaflet(average_pm_by_site_22) %>% 
  addProviderTiles('CartoDB.Positron') %>% 
  addCircles(
    lat = ~Lat, lng=~Lon,
    label = ~paste0(round(average_pm_by_site_22$Average_PM_2022_site,2), ' PM2.5'), color = ~ temp.pal22.s(average_pm_by_site_22$Average_PM_2022_site),
    opacity = 1, fillOpacity = 1, radius = 500
    ) %>%
  addLegend('bottomleft', pal=temp.pal22.s, values=average_pm_by_site_22$Average_PM_2022_site,
          title='Mean Concentrations PM2.5 by site in 2022', opacity=1)
PMmap22.s

Decrease overall in average PM2.5 concentrations by site from 2002 to 2022.